rm(list = ls()) # clear memory
#This is the link to the peru network graph from my GitHub repo:
peru_link = "https://github.com/DACSS690C/networks/raw/refs/heads/main/MyGraphs/peru.graphml"
# get network
peru = read_graph(peru_link, format='graphml')
#making sure to have the name labels
V(peru)$name=V(peru)$id
# let's draw the network
set.seed(111) # use this to get same results as me
plot.igraph(peru)
# set seed
set.seed(321)
peru_lv <- cluster_louvain(peru)
sizes(peru_lv) # returns the community sizes, in the order of their ids
## Community sizes
## 1 2 3 4 5 6 7 8 9 10
## 13 11 4 3 1 1 1 1 1 1
# plot the louvain algorithm
plot(peru_lv, peru,
layout = layout_with_kk, # plot with the Kamada-Kawai layout algorithm
# place the vertices on the plane, or in the 3d space, based on a phyisical model of springs.
main="Louvain Solution")
Let’s see the modularity score:
modularity(peru, membership(peru_lv), directed = FALSE)
## [1] 0.2886145
# compute the communities using the objective function
set.seed(123) # set seed
peru_ld <- cluster_leiden(peru,
objective_function ="modularity")
sizes(peru_ld)
## Community sizes
## 1 2 3 4 5 6 7 8 9 10
## 13 11 4 3 1 1 1 1 1 1
# plot the leiden algorithm
plot(peru_ld, peru,
layout = layout_with_kk, # plot with the Kamada-Kawai layout algorithm
main="Leiden Solution-using modularity")
Let’s see the modularity score:
modularity(peru, membership(peru_ld))
## [1] 0.2886145
The modularity score are the same for both methods. Thus, the strength of the community structures for both networks is the same for the Leiden method and Louvain method. You can choose either method.
#This is the link to the seattle network graph from my GitHub repo:
seattle_link = "https://github.com/DACSS690C/networks/raw/refs/heads/main/MyGraphs/seattle.graphml"
# get network
seattle = read_graph(seattle_link, format='graphml')
#making sure to have the name labels
V(seattle)$name=V(seattle)$id
# let's draw the network
set.seed(111) # use this to get same results as me
plot.igraph(seattle)
# CODE COMMENTED OUT SO CODE COULD BE KNIT INTO HTML
# # set seed
# set.seed(321)
#
# seattle_lv <- cluster_louvain(seattle)
# sizes(seattle_lv) # returns the community sizes, in the order of their ids
The Louvain method only works for undirected graphs, and the Seattle graph is directed as it represents an asymmetric relationship.
# CODE COMMENTED OUT SO CODE COULD BE KNIT INTO HTML
# compute the communities using the objective function
# set.seed(123) # set seed
# seattle_ld <- cluster_leiden(seattle,
# objective_function ="modularity")
#
# sizes(seattle_ld)
The Leiden method also did not work, as it is also only for use with undirected graphs.
set.seed(333)
seattle_gn <- cluster_edge_betweenness(seattle, directed = TRUE)
## Warning in cluster_edge_betweenness(seattle, directed = TRUE): At
## vendor/cigraph/src/community/edge_betweenness.c:504 : Membership vector will be
## selected based on the highest modularity score.
sizes(seattle_gn)
## Community sizes
## 1 2 3 4
## 42 2 1 1
# plot GM algorithm
plot(seattle_gn, seattle,
layout = layout_with_kk,
main="Girvan-Newman Solution")
Modularity score:
modularity(seattle, membership(seattle_gn), directed = TRUE)
## [1] 0.003039821
set.seed(333)
seattle_im <- cluster_infomap(seattle)
sizes(seattle_im)
## Community sizes
## 1
## 46
# plot
set.seed(332)
plot(seattle_im, seattle,
# vertex.label=V(seattle)$name,
layout = layout_nicely, vertex.label.cex=0.5,
main="Infomap solution")
Modularity score:
modularity(seattle, membership(seattle_im))
## [1] 0
sizes(cluster_label_prop(seattle, mode = 'out'))
## Community sizes
## 1
## 46
The modularity score for the infomap algorithm is 0, indicating that the the connections in the community are no better than random assignment. On the other hand, the modularity score for the Girvan-Newmann algorithm is 0.003039821, which, although still very low, is better than 0. Thus, the Girvan-Newmann algorithm should be chosen over the infomap algorithm.
#This is the link to the fifa network graph from my GitHub repo:
# graph that projects only the network of countries
fifa_link = "https://github.com/DACSS690C/networks/raw/refs/heads/main/MyGraphs/country_projected.graphml"
# get network
fifa = read_graph(fifa_link, format='graphml')
#making sure to have the name labels
V(fifa)$name=V(fifa)$id
# let's draw the network
set.seed(111) # use this to get same results as me
plot.igraph(fifa)
set.seed(333)
fifa_gn <- cluster_edge_betweenness(fifa, directed = FALSE)
## Warning in cluster_edge_betweenness(fifa, directed = FALSE): At
## vendor/cigraph/src/community/edge_betweenness.c:504 : Membership vector will be
## selected based on the highest modularity score.
sizes(fifa_gn)
## Community sizes
## 1 2 3 4 5 6 7 8 9 10
## 11 1 13 1 1 1 1 1 1 1
# plot GM algorithm
plot(fifa_gn, fifa,
layout = layout_with_kk,
main="Girvan-Newman Solution")
Modularity score:
modularity(fifa, membership(fifa_gn), directed = FALSE)
## [1] 0.01644719
# compute the communities using the objective function
set.seed(123) # set seed
fifa_ld <- cluster_leiden(fifa,
objective_function ="modularity")
sizes(fifa_ld)
## Community sizes
## 1 2 3
## 12 8 12
# plot the leiden algorithm
plot(fifa_ld, fifa,
layout = layout_with_kk, # plot with the Kamada-Kawai layout algorithm
main="Leiden Solution-using modularity")
Let’s see the modularity score:
modularity(fifa, membership(fifa_ld))
## [1] 0.04589626
Girvan-Newmann algorithm = 0.01644719 Leiden algorithm = 0.04589626
Since the modularity score for the Leiden algorithm is closer to 1 than the score for the Girvan-Newmann algorithm, it is stronger.
Thus, the Leiden algorithm should be chosen over the Girvan-Newmann algorithm.